Reguired package

library(nema)
library(iNEXT)
library(ggplot2)
library(reshape2)
library(vegan)
library(plyr)
library(viridis)
library(doBy)
library(cowplot)
library(knitr)
library(nlme)
library(MuMIn)

ggplot theme setting

large <- theme(legend.title = element_text(size=15),
        legend.text = element_text(size=15),
        axis.title = element_text(size=15),
        axis.text = element_text(size=15),
        strip.text = element_text(size=15))

rotate <- theme(axis.text.x = element_text(angle = 30, hjust=0.5))

no_strip <- theme(strip.background = element_rect(colour=NA, fill=NA),
                  strip.text = element_text(colour=NA))

Preparing nematode and environmental data

Relative abundance of nematode species from 100 randomly slected individuals was scaled to the total nematode abundance in each sample.

# Convert to species-by-sample matrix
b <- acast(nema_abund, Cruise+Station+Deployment+Tube+Subcore~Genus+Species, fill=0)
# Multiply the relative abundance by total abundance
#b <- ceiling(decostand(b, "total")*nema_total$Abundance)

# Match the trait data to the species-by-sample matrix
s <- nema_species[match(colnames(b), with(nema_species, paste(Genus, Species, sep="_"))),]

nema_cruise$Date <- factor(nema_cruise$Cruise, labels=c("2015-08", "2015-11"))
# Define depth zone
depth.bk <- c(200, 400, 600, 800, 1100)
depth.lab <- c("200-400", "400-600", "600-800", "800-1100")
nema_cruise$Zone <- cut(nema_cruise$Depth, breaks=depth.bk, labels=depth.lab)

Abundance

abu <- summaryBy(Abundance~Cruise+Station+Deployment, nema_total, FUN=c(mean, sd, length))
names(abu)[-1:-3] <- c("Abund", "sd", "n") 
id1 <- with(abu, paste(Cruise, Station, Deployment))
id2 <- with(nema_cruise, paste(Cruise, Station, Deployment))
abu <- cbind(abu, nema_cruise[match(id1, id2), -3:-5])

ggplot(data=abu, 
       aes(x=Depth, y=Abund, ymin=Abund-sd, ymax=Abund+sd, colour=Habitat))+
  geom_point(aes(fill=Habitat), pch=21, colour=gray(0, 0.2), size=5,
                  position=position_jitter(width=10))+
  geom_errorbar()+
  labs(x="Depth (m)", y="Number of Individual")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  scale_y_log10()+
  theme_bw() %+replace% large

# Linear regression
hl <- splitBy(~Habitat, abu)
lapply(hl, FUN=function(x)summary(lm(log10(Abund)~Depth, data=x)))
## $Canyon
## 
## Call:
## lm(formula = log10(Abund) ~ Depth, data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9773 -0.3273 -0.0743  0.5210  0.7714 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  1.7921647  0.5573392   3.216   0.0182 *
## Depth       -0.0001264  0.0008106  -0.156   0.8812  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6346 on 6 degrees of freedom
## Multiple R-squared:  0.004039,   Adjusted R-squared:  -0.162 
## F-statistic: 0.02433 on 1 and 6 DF,  p-value: 0.8812
## 
## 
## $Slope
## 
## Call:
## lm(formula = log10(Abund) ~ Depth, data = x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45214 -0.27533 -0.01458  0.20630  0.56582 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.7130420  0.3804968    7.13 0.000383 ***
## Depth       -0.0005616  0.0006242   -0.90 0.402926    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3833 on 6 degrees of freedom
## Multiple R-squared:  0.1189, Adjusted R-squared:  -0.02797 
## F-statistic: 0.8095 on 1 and 6 DF,  p-value: 0.4029
# Linear Mixed-Effects Model
f <- lme(fixed = log10(Abund) ~ Habitat*Depth, random = ~1|Cruise, data=abu, method = "REML", weights=varIdent(form=~1|Cruise))
summary(f)
## Linear mixed-effects model fit by REML
##  Data: abu 
##        AIC      BIC    logLik
##   62.31816 65.71251 -24.15908
## 
## Random effects:
##  Formula: ~1 | Cruise
##          (Intercept) Residual
## StdDev: 1.658777e-05 0.333253
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Cruise 
##  Parameter estimates:
## OR1_1114 OR1_1126 
## 1.000000 2.160647 
## Fixed effects: log10(Abund) ~ Habitat * Depth 
##                         Value Std.Error DF   t-value p-value
## (Intercept)         2.0621541 0.3771112 11  5.468292  0.0002
## HabitatSlope        0.2219863 0.5681727 11  0.390702  0.7035
## Depth              -0.0000840 0.0005511 11 -0.152347  0.8817
## HabitatSlope:Depth -0.0000132 0.0008887 11 -0.014875  0.9884
##  Correlation: 
##                    (Intr) HbttSl Depth 
## HabitatSlope       -0.664              
## Depth              -0.916  0.608       
## HabitatSlope:Depth  0.568 -0.925 -0.620
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -1.76092290 -0.74078613 -0.06355714  0.65721816  1.46392543 
## 
## Number of Observations: 16
## Number of Groups: 2
# Function to plot dianotics plot
dianostic_plot <- 
  function(f, y){
    # standardized residuals versus fitted values
    a1 <- plot(f, resid(., type = "p") ~ fitted(.) | Habitat, abline = 0)
    a2 <- plot(f, resid(., type = "p") ~ fitted(.) | Zone, abline = 0)
    a3 <- plot(f, resid(., type = "p") ~ fitted(.) | Cruise, abline = 0)
    a4 <- plot(f, resid(., type = "p") ~ fitted(.), abline = 0)
    # box-plots of residuals
    b1<-plot(f, Habitat ~ resid(.))
    b2 <-plot(f, Zone ~ resid(.))
    b3 <- plot(f, Cruise ~ resid(.))
    # observed versus fitted values
    c1<-plot(f, paste(paste(y, "fitted(.)", sep="~"), "Habitat", sep="|") %>% formula, abline = c(0,1))
    c2<-plot(f, paste(paste(y, "fitted(.)", sep="~"), "Zone", sep="|") %>% formula, abline = c(0,1))
    c3<-plot(f, paste(paste(y, "fitted(.)", sep="~"), "Cruise", sep="|") %>% formula, abline = c(0,1))
    c4<-plot(f, paste(y, "fitted(.)", sep="~") %>% formula, abline = c(0,1))
    # QQ plot
    d1<-qqnorm(f, ~ resid(., type = "p") | Habitat, abline = c(0,1))
    d2<-qqnorm(f, ~ resid(., type = "p") | Zone, abline = c(0,1))
    d3<-qqnorm(f, ~ resid(., type = "p") | Cruise, abline = c(0,1))
    d4<-qqnorm(f, ~ resid(., type = "p"), abline = c(0,1))
    
    print(a1, split=c(1,1,4,4), more=TRUE)
    print(a2, split=c(2,1,4,4), more=TRUE)
    print(a3, split=c(3,1,4,4), more=TRUE)
    print(a4, split=c(4,1,4,4), more=TRUE)
    print(b1, split=c(1,2,4,4), more=TRUE)
    print(b2, split=c(2,2,4,4), more=TRUE)
    print(b3, split=c(3,2,4,4), more=TRUE)
    #
    print(c1, split=c(1,3,4,4), more=TRUE)
    print(c2, split=c(2,3,4,4), more=TRUE)
    print(c3, split=c(3,3,4,4), more=TRUE)
    print(c4, split=c(4,3,4,4), more=TRUE)
    print(d1, split=c(1,4,4,4), more=TRUE)
    print(d2, split=c(2,4,4,4), more=TRUE)
    print(d3, split=c(3,4,4,4), more=TRUE)
    print(d4, split=c(4,4,4,4))
    }
dianostic_plot(f, y = "log10(Abund)")

# Adding time into linear model
f <- gls(log10(Abund) ~ Habitat+Depth+Date+Habitat:Depth+Habitat:Date+Depth:Date, data=abu, method = "REML")
summary(f)
## Generalized least squares fit by REML
##   Model: log10(Abund) ~ Habitat + Depth + Date + Habitat:Depth + Habitat:Date +      Depth:Date 
##   Data: abu 
##        AIC      BIC    logLik
##   65.98507 67.56286 -24.99253
## 
## Coefficients:
##                               Value Std.Error   t-value p-value
## (Intercept)               2.0408366 0.3827267  5.332361  0.0005
## HabitatSlope              0.2443469 0.4742909  0.515184  0.6188
## Depth                     0.0002088 0.0005502  0.379438  0.7132
## Date2015-11              -0.5344020 0.4855644 -1.100579  0.2996
## HabitatSlope:Depth       -0.0004650 0.0006934 -0.670630  0.5193
## HabitatSlope:Date2015-11  1.3884014 0.3374120  4.114855  0.0026
## Depth:Date2015-11        -0.0006078 0.0006736 -0.902312  0.3904
## 
##  Correlation: 
##                          (Intr) HbttSl Depth  D2015- HbtS:D HS:D20
## HabitatSlope             -0.578                                   
## Depth                    -0.899  0.471                            
## Date2015-11              -0.639  0.162  0.543                     
## HabitatSlope:Depth        0.438 -0.865 -0.487 -0.006              
## HabitatSlope:Date2015-11  0.278 -0.351 -0.067 -0.446 -0.004       
## Depth:Date2015-11         0.565 -0.050 -0.629 -0.873  0.012  0.119
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.66805349 -0.19198602  0.01729177  0.53830912  1.24208523 
## 
## Residual standard error: 0.3349952 
## Degrees of freedom: 16 total; 9 residual
dianostic_plot(f, y = "log10(Abund)")

# Pairwise tests on time in canyon
fp <- gls(log10(Abund) ~ Depth+Date+Depth:Date, data=subset(abu, Habitat=="Canyon"), method = "REML")
summary(fp)
## Generalized least squares fit by REML
##   Model: log10(Abund) ~ Depth + Date + Depth:Date 
##   Data: subset(abu, Habitat == "Canyon") 
##        AIC      BIC    logLik
##   42.60797 39.53944 -16.30399
## 
## Coefficients:
##                        Value Std.Error   t-value p-value
## (Intercept)        2.2037812 0.5354425  4.115813  0.0147
## Depth             -0.0000517 0.0007845 -0.065962  0.9506
## Date2015-11       -0.8537156 0.7526904 -1.134219  0.3201
## Depth:Date2015-11 -0.0001003 0.0010949 -0.091628  0.9314
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.916              
## Date2015-11       -0.711  0.652       
## Depth:Date2015-11  0.657 -0.717 -0.915
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.2199604 -0.1923151  0.1450373  0.4715691  0.7842177 
## 
## Residual standard error: 0.4284371 
## Degrees of freedom: 8 total; 4 residual
# Pairwise tests on time on slope
fp <- gls(log10(Abund) ~ Depth+Date+Depth:Date, data=subset(abu, Habitat=="Slope"), method = "REML")
summary(fp)
## Generalized least squares fit by REML
##   Model: log10(Abund) ~ Depth + Date + Depth:Date 
##   Data: subset(abu, Habitat == "Slope") 
##        AIC      BIC    logLik
##   35.83861 32.77008 -12.91931
## 
## Coefficients:
##                        Value Std.Error   t-value p-value
## (Intercept)        2.0497716 0.2917777  7.025115  0.0022
## Depth              0.0001569 0.0004786  0.327821  0.7595
## Date2015-11        1.3235342 0.4121029  3.211660  0.0325
## Depth:Date2015-11 -0.0014320 0.0006760 -2.118310  0.1015
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.935              
## Date2015-11       -0.708  0.662       
## Depth:Date2015-11  0.662 -0.708 -0.934
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.15538748 -0.48059120  0.03981799  0.47490170  1.13673861 
## 
## Residual standard error: 0.2075455 
## Degrees of freedom: 8 total; 4 residual
ggplot()+
  geom_errorbar(data=abu, aes(x=Zone, y=Abund, ymin=Abund, ymax=Abund+sd, fill=Date), position="dodge")+
  geom_bar(data=abu, aes(x=Zone, y=Abund, fill=Date), stat="identity", position="dodge", colour=gray(0, 0.2))+
  facet_wrap(~Habitat)+
  labs(x="Depth (m)", y="Number of Individual")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  scale_y_log10()+
  theme_bw() %+replace% large %+replace% theme(axis.text.x=element_text(angle=30))

Estimate Hill Numbers

q0 <- iNEXT(t(b), q = 0, size=1:200, nboot=1000)
q1 <- iNEXT(t(b), q = 1, size=1:200, nboot=1000)
q2 <- iNEXT(t(b), q = 2, size=1:200, nboot=1000)
save(list=c("q0", "q1", "q2"), file="../rda/HillNumbers.rda")
load("../rda/HillNumbers.rda")

h <- rbind(ldply(q0$iNextEst, .id="Sample"),
           ldply(q1$iNextEst, .id="Sample"),
           ldply(q2$iNextEst, .id="Sample"))

ggplot()+
  geom_line(data= subset(h, method!="observed"), 
            aes(x=m, y=qD, colour=method, group=Sample))+
  geom_point(data= subset(h, method=="observed"), 
             aes(x=m, y=qD, group=Sample))+
  geom_vline(xintercept = c(50, 100, 150), linetype=3)+
  facet_wrap(~order, scale="free")+
  labs(x = "Numbers of Individuals", y = "Effective Number of Species",
       colour="Method")+
  theme_bw()%+replace% large %+replace% no_strip

h <- subset(h, !(order==2 & Sample == "OR1_1126_GC2_1_9_2" | Sample == "OR1_1126_GC2_1_9_3"))

ggplot()+
  geom_line(data= subset(h, method!="observed"), 
            aes(x=m, y=qD, colour=method, group=Sample))+
  geom_point(data= subset(h, method=="observed"), 
             aes(x=m, y=qD, group=Sample))+
  geom_vline(xintercept = c(50, 100, 150), linetype=3)+
  facet_wrap(~order, scale="free")+
  labs(x = "Numbers of Individuals", y = "Effective Number of Species",
       colour="Method")+
  theme_bw()%+replace% large %+replace% no_strip

Fig. X. Size-based diversity accumulation curves based on species richness (left), exponential Shannon (middle) and inversed Simpson indices (right). The blue lines indicate the interpolated (rarefied) and red lines indicate extrapolated parts of the accumulation curves based on 1000 permutations. Dotted symbols indicate the observed diversity values. The Hill numbers of order q = 2 (inversed Simpson) for sample “OR1_1126_GC2_1_9_2” and “OR1_1126_GC2_1_9_3” can not be properly estimated and thus were eliminated.

ggplot()+
  geom_line(data= subset(h, method!="observed"), 
            aes(x=m, y=SC, colour=method, group=Sample))+
  geom_point(data= subset(h, method=="observed"), 
             aes(x=m, y=SC, group=Sample))+
  geom_vline(xintercept = c(50, 100, 150), linetype=3)+
  facet_wrap(~order, scale="free")+
  labs(x = "Numbers of Individuals", y = "Sample Coverage",
       colour="Method")+
  theme_bw()%+replace% large %+replace% no_strip

Fig. X. Sample coverage based on species richness (left), exponential Shannon (middle) and inversed Simpson indices (right). The blue lines indicate the interpolated (rarefied) and red lines indicate extrapolated parts of the accumulation curves based on 1000 permutations. Dotted symbols indicate the observed diversity values. The Hill numbers of order q = 2 (inversed Simpson) for sample “OR1_1126_GC2_1_9_2” and “OR1_1126_GC2_1_9_3” can not be properly estimated and thus were eliminated.

# Minimum sample coverage for each order q
min(ldply(lapply(splitBy(~order+Sample, subset(h, order==0)), FUN=function(x)x[dim(x)[1],]))$SC)
## [1] 0.807
min(ldply(lapply(splitBy(~order+Sample, subset(h, order==1)), FUN=function(x)x[dim(x)[1],]))$SC)
## [1] 0.807
min(ldply(lapply(splitBy(~order+Sample, subset(h, order==2)), FUN=function(x)x[dim(x)[1],]))$SC)
## [1] 0.807
# SC = 0.807

ggplot()+
  geom_line(data= subset(h, method!="observed"), 
            aes(x=SC, y=qD, colour=method, group=Sample))+
  geom_point(data= subset(h, method=="observed"), 
             aes(x=SC, y=qD, group=Sample))+
  geom_vline(xintercept = c(0.807), linetype=3)+
  facet_wrap(~order, scale="free")+
  labs(x = "Sample Coverage", y = "Effective Number of Species",
       colour="Method")+
  theme_bw()%+replace% large %+replace% no_strip

Fig. X. Coverage-based Diversity accumulation curves based on species richness (left), exponential Shannon (middle) and inversed Simpson indices (right) from surface. The blue lines indicate the interpolated (rarefied) and red lines indicate extrapolated parts of the accumulation curves based on 1000 permutations. Dotted symbols indicate the observed diversity values. The vertical dashed line (80.7%) shows the sample with the lowest sample coverage. The Hill numbers of order q = 2 (inversed Simpson) for sample “OR1_1126_GC2_1_9_2” and “OR1_1126_GC2_1_9_3” can not be properly estimated and thus were eliminated.

ggplot()+
  geom_line(data= subset(h, method!="observed"), 
            aes(x=SC, y=m, colour=method, group=Sample))+
  geom_point(data= subset(h, method=="observed"), 
             aes(x=SC, y=qD, group=Sample))+
  geom_vline(xintercept = c(0.807), linetype=3)+
  facet_wrap(~order, scale="free")+
  labs(x = "Sample Coverage", y = "Numbers of Individual",
       colour="Method")+
  theme_bw()%+replace% large %+replace% no_strip

Fig. X. Sample size as a function of sample coverage based on species richness (left), exponential Shannon (middle) and inversed Simpson indices (right) from surface. The blue lines indicate the interpolated (rarefied) and red lines indicate extrapolated parts of the accumulation curves based on 1000 permutations. Dotted symbols indicate the observed diversity values. The vertical dashed line (80.7%) shows the sample with the lowest sample coverage. The Hill numbers of order q = 2 (inversed Simpson) for sample “OR1_1126_GC2_1_9_2” and “OR1_1126_GC2_1_9_3” can not be properly estimated and thus were eliminated.

# ~80.7% sample coverage
h0 <- lapply(q0$iNextEst, FUN=function(x) {
  d<-abs(x$SC-0.807)
  x[1:which(d==min(d)),]
  }) %>% ldply(.id="Sample")

h1 <- lapply(q1$iNextEst, FUN=function(x) {
  d<-abs(x$SC-0.807)
  x[1:which(d==min(d)),]
  }) %>% ldply(.id="Sample")

h2 <- lapply(q2$iNextEst, FUN=function(x) {
  d<-abs(x$SC-0.807)
  x[1:which(d==min(d)),]
  }) %>% ldply(.id="Sample")

h <- rbind(h0, h1, h2)
h <- subset(h, !(order==2 & Sample == "OR1_1126_GC2_1_9_3"))

ggplot()+
  geom_line(data= subset(h, method!="observed"), 
            aes(x=m, y=qD, colour=method, group=Sample))+
  geom_point(data= subset(h, method=="observed"), 
             aes(x=m, y=qD, group=Sample))+
  geom_vline(xintercept = c(50, 100, 150), linetype=3)+
  facet_wrap(~order, scale="free")+
  labs(x = "Numbers of Individuals", y = "Effective Number of Species",
       colour="Method")+
  theme_bw()%+replace% large %+replace% no_strip

Fig. X. Size-based diversity accumulation curves based on species richness (left), exponential Shannon (middle) and inversed Simpson indices (right). The blue lines indicate the interpolated (rarefied) and red lines indicate extrapolated parts of the accumulation curves based on 1000 permutations. Dotted symbols indicate the observed diversity values. Sample size (numbers of individuals) and the Hill numbers are cut off by 80.7% sample coverage. The Hill numbers of order q = 2 (inversed Simpson) for sample “OR1_1126_GC2_1_9_3” can not be properly estimated and thus were eliminated.

ggplot()+
  geom_line(data= subset(h, method!="observed"), 
            aes(x=SC, y=qD, colour=method, group=Sample))+
  geom_point(data= subset(h, method=="observed"), 
             aes(x=SC, y=qD, group=Sample))+
  geom_vline(xintercept = c(0.807), linetype=3)+
  facet_wrap(~order, scale="free")+
  labs(x = "Sample Coverage", y = "Effective Number of Species",
       colour="Method")+
  theme_bw()%+replace% large %+replace% no_strip

Fig. X. Coverage-based Diversity accumulation curves based on species richness (left), exponential Shannon (middle) and inversed Simpson indices (right) from surface. The blue lines indicate the interpolated (rarefied) and red lines indicate extrapolated parts of the accumulation curves based on 1000 permutations. Dotted symbols indicate the observed diversity values. Sample size (numbers of individuals) and the Hill numbers are cut off by 80.7% sample coverage. The Hill numbers of order q = 2 (inversed Simpson) for sample “OR1_1126_GC2_1_9_3” can not be properly estimated and thus were eliminated.

ggplot()+
  geom_line(data= subset(h, method!="observed"), 
            aes(x=SC, y=m, colour=method, group=Sample))+
  geom_point(data= subset(h, method=="observed"), 
             aes(x=SC, y=qD, group=Sample))+
  geom_vline(xintercept = c(0.807), linetype=3)+
  facet_wrap(~order, scale="free")+
  labs(x = "Sample Coverage", y = "Numbers of Individual",
       colour="Method")+
  theme_bw()%+replace% large %+replace% no_strip

Fig. X. Sample size as a function of sample coverage based on species richness (left), exponential Shannon (middle) and inversed Simpson indices (right) from surface. The blue lines indicate the interpolated (rarefied) and red lines indicate extrapolated parts of the accumulation curves based on 1000 permutations. Dotted symbols indicate the observed diversity values. Sample size (numbers of individuals) and the Hill numbers are cut off by 80.7% sample coverage. The Hill numbers of order q = 2 (inversed Simpson) for sample “OR1_1126_GC2_1_9_3” can not be properly estimated and thus were eliminated.

# ~80.7% sample coverage
h0 <- lapply(q0$iNextEst, FUN=function(x) {
  d<-abs(x$SC-0.807)
  o <- subset(x, d==min(d))
  o[dim(o)[1],]
  }) %>% ldply(.id="Sample")
round(mean(h0$SC), 3)
## [1] 0.808
h1 <- lapply(q1$iNextEst, FUN=function(x) {
  d<-abs(x$SC-0.807)
  o <- subset(x, d==min(d))
  o[dim(o)[1],]
  }) %>% ldply(.id="Sample")
round(mean(h1$SC), 3)
## [1] 0.808
h2 <- lapply(q2$iNextEst, FUN=function(x) {
  d<-abs(x$SC-0.807)
  o <- subset(x, d==min(d))
  o[dim(o)[1],]
  }) %>% ldply(.id="Sample")
# Set the bad diversity estimates to NA
h2[h2$Sample == "OR1_1126_GC2_1_9_3", c("qD", "qD.LCL", "qD.UCL", "SC", "SC.LCL", "SC.UCL")] <- NA
round(mean(h2$SC, na.rm=TRUE), 3)
## [1] 0.808
# 100 randomly selected individual
#h0 <- subset(ldply(q0$iNextEst, .id="Sample"), m==100)
#h1 <- subset(ldply(q1$iNextEst, .id="Sample"), m==100)
#h2 <- subset(ldply(q2$iNextEst, .id="Sample"), m==100)
# Set the bad diversity estimates to NA
#h2[h2$Sample == "OR1_1126_GC2_1_9_2"|h2$Sample == "OR1_1126_GC2_1_9_3", c("qD", "qD.LCL", "qD.UCL", "SC", "SC.LCL", "SC.UCL")] <- NA

Observed number of species

fr <- strsplit(as.character(q0$DataInfo$site), split="_") %>% ldply
fr <- cbind(paste(fr$V1, fr$V2, sep="_"), fr[,-1:-2])
names(fr) <- c("Cruise", "Station", "Deployment", "Tube", "Subcore")

div <- cbind(S.obs=q0$DataInfo$S.obs, d0=h0$qD, d1 = h1$qD, d2=h2$qD)
#row.names(div) <- NULL
div <- cbind(fr, div)
div <- summaryBy(S.obs+d0+d1+d2~Cruise+Station+Deployment, div, FUN=c(mean, sd, length))
names(div)[-1:-3] <- c("S.obs", "d0", "d1", "d2", "S.obs.sd", "d0.sd", "d1.sd", "d2.sd", "S.obs.n", "d0.n", "d1.n", "d2.n")

id1 <- with(div, paste(Cruise, Station, Deployment))
id2 <- with(nema_cruise, paste(Cruise, Station, Deployment))
div <- cbind(div, nema_cruise[match(id1, id2), -3:-5])
  
ggplot(data=div, 
       aes(x=Depth, y=S.obs, ymin=S.obs-S.obs.sd, ymax=S.obs+S.obs.sd, colour=Habitat))+
  geom_point(aes(fill=Habitat), pch=21, colour=gray(0, 0.2), size=5)+
  geom_errorbar()+
  stat_smooth(data=subset(div, Habitat=="Slope"), 
       aes(x=Depth, y=S.obs, colour=Habitat), method="lm", fill="gray60")+
  labs(x="Depth (m)", y="Number of Species")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large

# Linear regression
hl <- splitBy(~Habitat, div)
lapply(hl, FUN=function(x)summary(lm(S.obs~Depth, data=x)))
## $Canyon
## 
## Call:
## lm(formula = S.obs ~ Depth, data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8867 -3.0501  0.0747  2.5394  5.5755 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.361518   3.419092   1.861    0.112
## Depth       0.005185   0.004973   1.043    0.337
## 
## Residual standard error: 3.893 on 6 degrees of freedom
## Multiple R-squared:  0.1534, Adjusted R-squared:  0.01231 
## F-statistic: 1.087 on 1 and 6 DF,  p-value: 0.3373
## 
## 
## $Slope
## 
## Call:
## lm(formula = S.obs ~ Depth, data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3133 -1.1208 -0.4987  1.6849  3.6867 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 27.493078   2.751996   9.990 5.82e-05 ***
## Depth        0.019835   0.004515   4.394   0.0046 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.772 on 6 degrees of freedom
## Multiple R-squared:  0.7629, Adjusted R-squared:  0.7234 
## F-statistic:  19.3 on 1 and 6 DF,  p-value: 0.0046
# Linear Mixed-Effects Models
f <- lme(fixed = S.obs ~ Habitat*Depth, random = ~1|Cruise, data=div, method = "REML", weights=varIdent(form=~1|Cruise))
summary(f)
## Linear mixed-effects model fit by REML
##  Data: div 
##        AIC      BIC    logLik
##   101.0525 104.4469 -43.52627
## 
## Random effects:
##  Formula: ~1 | Cruise
##         (Intercept) Residual
## StdDev:    3.027131 2.453253
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Cruise 
##  Parameter estimates:
##  OR1_1114  OR1_1126 
## 1.0000000 0.8752995 
## Fixed effects: S.obs ~ Habitat * Depth 
##                        Value Std.Error DF  t-value p-value
## (Intercept)         6.422244 2.9335052 11 2.189273  0.0510
## HabitatSlope       21.558820 3.0274919 11 7.121017  0.0000
## Depth               0.004950 0.0029136 11 1.698860  0.1174
## HabitatSlope:Depth  0.014113 0.0047259 11 2.986190  0.0124
##  Correlation: 
##                    (Intr) HbttSl Depth 
## HabitatSlope       -0.453              
## Depth              -0.625  0.606       
## HabitatSlope:Depth  0.386 -0.925 -0.617
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.2995360 -0.6645470  0.1946463  0.5821909  1.4682874 
## 
## Number of Observations: 16
## Number of Groups: 2
dianostic_plot(f, y = "S.obs")

# Adding time into linear model
f <- gls(S.obs ~ Habitat+Depth+Date+Habitat:Depth+Habitat:Date++Depth:Date, data=div, method = "REML")
summary(f)
## Generalized least squares fit by REML
##   Model: S.obs ~ Habitat + Depth + Date + Habitat:Depth + Habitat:Date +      +Depth:Date 
##   Data: div 
##        AIC      BIC    logLik
##   99.06251 100.6403 -41.53125
## 
## Coefficients:
##                              Value Std.Error   t-value p-value
## (Intercept)               6.557582 2.4041616  2.727596  0.0233
## HabitatSlope             20.468462 2.9793376  6.870139  0.0001
## Depth                     0.009234 0.0034560  2.671857  0.0255
## Date2015-11              -0.664558 3.0501541 -0.217877  0.8324
## HabitatSlope:Depth        0.014415 0.0043559  3.309337  0.0091
## HabitatSlope:Date2015-11  1.591870 2.1195098  0.751056  0.4718
## Depth:Date2015-11        -0.007617 0.0042311 -1.800324  0.1053
## 
##  Correlation: 
##                          (Intr) HbttSl Depth  D2015- HbtS:D HS:D20
## HabitatSlope             -0.578                                   
## Depth                    -0.899  0.471                            
## Date2015-11              -0.639  0.162  0.543                     
## HabitatSlope:Depth        0.438 -0.865 -0.487 -0.006              
## HabitatSlope:Date2015-11  0.278 -0.351 -0.067 -0.446 -0.004       
## Depth:Date2015-11         0.565 -0.050 -0.629 -0.873  0.012  0.119
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.2736866 -0.6338234  0.3107286  0.4811254  1.2961868 
## 
## Residual standard error: 2.104328 
## Degrees of freedom: 16 total; 9 residual
dianostic_plot(f, y = "S.obs")

# Pairwise tests on time in canyon
fp <- gls(S.obs ~ Depth+Date+Depth:Date, data=subset(div, Habitat=="Canyon"), method = "REML")
summary(fp)
## Generalized least squares fit by REML
##   Model: S.obs ~ Depth + Date + Depth:Date 
##   Data: subset(div, Habitat == "Canyon") 
##       AIC      BIC    logLik
##   57.0701 54.00157 -23.53505
## 
## Coefficients:
##                       Value Std.Error    t-value p-value
## (Intercept)        7.343730  3.264492  2.2495781  0.0877
## Depth              0.007977  0.004783  1.6677342  0.1707
## Date2015-11       -2.205129  4.589011 -0.4805238  0.6560
## Depth:Date2015-11 -0.005169  0.006676 -0.7743248  0.4820
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.916              
## Date2015-11       -0.711  0.652       
## Depth:Date2015-11  0.657 -0.717 -0.915
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -0.95938901 -0.44721514 -0.03665273  0.46395559  1.05841249 
## 
## Residual standard error: 2.6121 
## Degrees of freedom: 8 total; 4 residual
# Pairwise tests on time on slope
fp <- gls(S.obs ~ Depth+Date+Depth:Date, data=subset(div, Habitat=="Slope"), method = "REML")
summary(fp)
## Generalized least squares fit by REML
##   Model: S.obs ~ Depth + Date + Depth:Date 
##   Data: subset(div, Habitat == "Slope") 
##        AIC      BIC    logLik
##   52.14384 49.07531 -21.07192
## 
## Coefficients:
##                       Value Std.Error   t-value p-value
## (Intercept)       25.890268  2.239807 11.559149  0.0003
## Depth              0.025642  0.003674  6.979079  0.0022
## Date2015-11        3.192644  3.163474  1.009221  0.3700
## Depth:Date2015-11 -0.011594  0.005190 -2.234158  0.0892
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.935              
## Date2015-11       -0.708  0.662       
## Depth:Date2015-11  0.662 -0.708 -0.934
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.2994989 -0.6148752  0.2452669  0.5886017  0.8089651 
## 
## Residual standard error: 1.593206 
## Degrees of freedom: 8 total; 4 residual
# Normalized enivronmental data to mean zero and unit variance
es <- as.data.frame(scale(div[, c(-1:-23, -45)]))

# General linear Models
dat <- cbind(div[, c(1:23, 45)], es)
f <- gls(S.obs ~ Speed+CN+Salinity+over20+TOC+transmissometer+Temperature, data=dat)
summary(f)
## Generalized least squares fit by REML
##   Model: S.obs ~ Speed + CN + Salinity + over20 + TOC + transmissometer +      Temperature 
##   Data: dat 
##        AIC      BIC    logLik
##   83.28878 84.00376 -32.64439
## 
## Coefficients:
##                     Value Std.Error   t-value p-value
## (Intercept)     24.208333  1.251815 19.338591  0.0000
## Speed           -3.717289  3.538928 -1.050400  0.3242
## CN               3.079616  2.025142  1.520691  0.1668
## Salinity        -1.448832  1.498685 -0.966735  0.3620
## over20           3.942649  2.832092  1.392133  0.2014
## TOC              8.802106  3.434023  2.563205  0.0335
## transmissometer  6.729000  2.560597  2.627902  0.0303
## Temperature      3.762024  1.498929  2.509808  0.0364
## 
##  Correlation: 
##                 (Intr) Speed  CN     Salnty over20 TOC    trnsms
## Speed            0.000                                          
## CN               0.000 -0.451                                   
## Salinity         0.000 -0.396  0.026                            
## over20           0.000 -0.442  0.266  0.318                     
## TOC              0.000  0.533 -0.703  0.012  0.036              
## transmissometer  0.000  0.212  0.378 -0.159  0.288 -0.324       
## Temperature      0.000 -0.035 -0.118 -0.109 -0.298  0.103 -0.296
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.3002762 -0.3893298 -0.1961969  0.4299275  1.6123160 
## 
## Residual standard error: 5.007259 
## Degrees of freedom: 16 total; 8 residual
dianostic_plot(f, y = "S.obs")

# Model selection
ms <- dredge(f)
kable(ms[1:10,])
(Intercept) CN over20 Salinity Speed Temperature TOC transmissometer df logLik AICc delta weight
113 24.20833 NA NA NA NA 4.216725 12.12952 4.559750 5 -42.16941 100.3388 0.0000000 0.19324692
115 24.20833 NA 3.017912 NA NA 3.647730 13.48186 5.763590 6 -39.62554 100.5844 0.2456061 0.17091489
117 24.20833 NA NA -2.015811 NA 4.447022 11.69344 4.783019 6 -39.86407 101.0615 0.7226485 0.13464538
121 24.20833 NA NA NA -1.535019 4.417978 11.43601 3.862356 6 -40.02829 101.3899 1.0510881 0.11425408
114 24.20833 1.33545 NA NA NA 4.058774 10.78467 5.481200 6 -40.36103 102.0554 1.7165751 0.08191493
57 24.20833 NA NA NA -4.179696 5.018420 12.21642 NA 5 -43.03906 102.0781 1.7392969 0.08098956
123 24.20833 NA 3.683155 NA -2.656831 3.870636 12.57963 4.821895 7 -37.21460 102.4292 2.0903937 0.06794998
99 24.20833 NA 5.007039 NA NA NA 12.87976 7.116377 5 -43.43141 102.8628 2.5240011 0.05470572
59 24.20833 NA 2.138806 NA -5.212667 4.787202 12.99310 NA 6 -40.78076 102.8949 2.5560405 0.05383633
119 24.20833 NA 2.379996 -1.771163 NA 3.970349 12.81285 5.705298 7 -37.57176 103.1435 2.8047016 0.04754221
# Model averaging
ma <- model.avg(ms, fit=TRUE)
kable(summary(ma)$coefmat.full)
Estimate Std. Error Adjusted SE z value Pr(>|z|)
(Intercept) 24.2083333 1.453335 1.630425 14.8478632 0.0000000
Temperature 3.4225605 2.267046 2.378835 1.4387549 0.1502200
TOC 11.5994722 3.889382 4.127685 2.8101638 0.0049516
transmissometer 4.3610824 3.457525 3.625125 1.2030158 0.2289702
over20 1.4109169 2.721876 2.888996 0.4883762 0.6252834
Salinity -0.4884575 1.168275 1.244343 0.3925425 0.6946574
Speed -1.3263665 3.039898 3.225864 0.4111663 0.6809506
CN 0.6249945 1.796200 1.890503 0.3305970 0.7409489
# Best model
b <- get.models(ms, subset=1)[[1]]
summary(b)
## Generalized least squares fit by REML
##   Model: S.obs ~ Temperature + TOC + transmissometer + 1 
##   Data: dat 
##        AIC      BIC    logLik
##   94.33882 96.76335 -42.16941
## 
## Coefficients:
##                     Value Std.Error   t-value p-value
## (Intercept)     24.208333  1.336118 18.118407  0.0000
## Temperature      4.216725  1.476597  2.855705  0.0145
## TOC             12.129516  2.073277  5.850409  0.0001
## transmissometer  4.559750  1.976900  2.306516  0.0397
## 
##  Correlation: 
##                 (Intr) Tmprtr TOC   
## Temperature      0.000              
## TOC              0.000  0.325       
## transmissometer  0.000 -0.128 -0.708
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.3399655 -0.6326702 -0.1417870  0.8225041  1.4135532 
## 
## Residual standard error: 5.344473 
## Degrees of freedom: 16 total; 12 residual
dianostic_plot(b, y = "S.obs")

kable(summary(b)$tTable)
Value Std.Error t-value p-value
(Intercept) 24.208333 1.336118 18.118407 0.0000000
Temperature 4.216725 1.476597 2.855705 0.0144687
TOC 12.129516 2.073277 5.850409 0.0000783
transmissometer 4.559750 1.976900 2.306516 0.0397261
# Relative importance
kable(summary(ma)$sw)
x
TOC 0.9634640
Temperature 0.7972715
transmissometer 0.7642366
over20 0.4149971
Speed 0.3984150
CN 0.2897839
Salinity 0.2727747
ggplot()+
  geom_errorbar(data=div, aes(x=Zone, y=S.obs, ymin=S.obs, ymax=S.obs+S.obs.sd, fill=Date), position="dodge")+
  geom_bar(data=div, aes(x=Zone, y=S.obs, fill=Date), stat="identity", position="dodge", colour=gray(0, 0.2))+
  facet_wrap(~Habitat)+
  labs(x="Depth (m)", y="Number of Species")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large %+replace% theme(axis.text.x=element_text(angle=30))

div$Abund <- abu$Abund
ggplot(data=div, aes(x=Abund, y=S.obs, colour=Habitat, shape=Date))+
  geom_point(size=5)+
  labs(x="Number of Individual", y="Number of Species")+
  scale_shape_manual(values=c(1, 19))+
  scale_x_log10()+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large

q=0 or species richness

ggplot(data=div, 
       aes(x=Depth, y=d0, ymin=d0-d0.sd, ymax=d0+d0.sd, colour=Habitat))+
  geom_pointrange(aes(fill=Habitat), pch=21, colour=gray(0, 0.2), fatten = 10, 
                  position=position_jitter(width=10))+
  stat_smooth(method="lm", fill="gray60")+
  labs(x="Depth (m)", y="Species Richness")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large

# Linear regression
hl <- splitBy(~Habitat, div)
lapply(hl, FUN=function(x)summary(lm(d0~Depth, data=x)))
## $Canyon
## 
## Call:
## lm(formula = d0 ~ Depth, data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8264 -1.0433  0.3256  0.6468  2.2704 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 1.897619   1.280434   1.482  0.18885   
## Depth       0.008435   0.001862   4.529  0.00398 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.458 on 6 degrees of freedom
## Multiple R-squared:  0.7737, Adjusted R-squared:  0.736 
## F-statistic: 20.52 on 1 and 6 DF,  p-value: 0.003977
## 
## 
## $Slope
## 
## Call:
## lm(formula = d0 ~ Depth, data = x)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.700 -1.224  1.172  1.754  3.034 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 15.049996   3.197072   4.707 0.003301 ** 
## Depth        0.044530   0.005245   8.491 0.000146 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.22 on 6 degrees of freedom
## Multiple R-squared:  0.9232, Adjusted R-squared:  0.9104 
## F-statistic: 72.09 on 1 and 6 DF,  p-value: 0.000146
# Linear Mixed-Effects Models
f <- lme(fixed = d0 ~ Habitat*Depth, random = ~1|Cruise, data=div, method = "REML", weights=varIdent(form=~1|Cruise))
summary(f)
## Linear mixed-effects model fit by REML
##  Data: div 
##        AIC     BIC    logLik
##   95.12905 98.5234 -40.56453
## 
## Random effects:
##  Formula: ~1 | Cruise
##         (Intercept)  Residual
## StdDev:   0.7502995 0.8519262
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Cruise 
##  Parameter estimates:
## OR1_1114 OR1_1126 
## 1.000000 4.116406 
## Fixed effects: d0 ~ Habitat * Depth 
##                        Value Std.Error DF   t-value p-value
## (Intercept)         2.432110 1.2180920 11  1.996655  0.0712
## HabitatSlope       14.378457 1.5565959 11  9.237116  0.0000
## Depth               0.006473 0.0015136 11  4.276727  0.0013
## HabitatSlope:Depth  0.037726 0.0024361 11 15.486255  0.0000
##  Correlation: 
##                    (Intr) HbttSl Depth 
## HabitatSlope       -0.564              
## Depth              -0.779  0.609       
## HabitatSlope:Depth  0.484 -0.926 -0.621
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.9654755 -0.5668793 -0.1394362  0.4205499  1.3642021 
## 
## Number of Observations: 16
## Number of Groups: 2
dianostic_plot(f, y = "d0")

# Adding time into linear model
f <- gls(d0 ~ Habitat+Depth+Date+Habitat:Depth+Habitat:Date++Depth:Date, data=div, method = "REML")
summary(f)
## Generalized least squares fit by REML
##   Model: d0 ~ Habitat + Depth + Date + Habitat:Depth + Habitat:Date +      +Depth:Date 
##   Data: div 
##        AIC      BIC    logLik
##   98.07853 99.65633 -41.03927
## 
## Coefficients:
##                              Value Std.Error   t-value p-value
## (Intercept)               2.411364 2.2762652  1.059351  0.3170
## HabitatSlope             15.587396 2.8208429  5.525794  0.0004
## Depth                     0.006908 0.0032721  2.111228  0.0639
## Date2015-11              -0.961399 2.8878922 -0.332907  0.7468
## HabitatSlope:Depth        0.036147 0.0041242  8.764799  0.0000
## HabitatSlope:Date2015-11 -4.925174 2.0067562 -2.454296  0.0365
## Depth:Date2015-11         0.002931 0.0040060  0.731674  0.4830
## 
##  Correlation: 
##                          (Intr) HbttSl Depth  D2015- HbtS:D HS:D20
## HabitatSlope             -0.578                                   
## Depth                    -0.899  0.471                            
## Date2015-11              -0.639  0.162  0.543                     
## HabitatSlope:Depth        0.438 -0.865 -0.487 -0.006              
## HabitatSlope:Date2015-11  0.278 -0.351 -0.067 -0.446 -0.004       
## Depth:Date2015-11         0.565 -0.050 -0.629 -0.873  0.012  0.119
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.89052131 -0.34180964  0.07145406  0.39814586  1.30604188 
## 
## Residual standard error: 1.992382 
## Degrees of freedom: 16 total; 9 residual
dianostic_plot(f, y = "d0")

# Pairwise tests on time in canyon
fp <- gls(d0 ~ Depth+Date+Depth:Date, data=subset(div, Habitat=="Canyon"), method = "REML")
summary(fp)
## Generalized least squares fit by REML
##   Model: d0 ~ Depth + Date + Depth:Date 
##   Data: subset(div, Habitat == "Canyon") 
##        AIC      BIC    logLik
##   52.34981 49.28128 -21.17491
## 
## Coefficients:
##                        Value Std.Error    t-value p-value
## (Intercept)        2.8459289 1.8095318  1.5727432  0.1909
## Depth              0.0062134 0.0026513  2.3435136  0.0791
## Date2015-11       -1.8129926 2.5437225 -0.7127321  0.5154
## Depth:Date2015-11  0.0042844 0.0037004  1.1578249  0.3114
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.916              
## Date2015-11       -0.711  0.652       
## Depth:Date2015-11  0.657 -0.717 -0.915
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.5972449 -0.2108113  0.2470843  0.5144718  0.6367311 
## 
## Residual standard error: 1.447906 
## Degrees of freedom: 8 total; 4 residual
# Pairwise tests on time on slope
fp <- gls(d0 ~ Depth+Date+Depth:Date, data=subset(div, Habitat=="Slope"), method = "REML")
summary(fp)
## Generalized least squares fit by REML
##   Model: d0 ~ Depth + Date + Depth:Date 
##   Data: subset(div, Habitat == "Slope") 
##        AIC      BIC    logLik
##   55.99702 52.92849 -22.99851
## 
## Coefficients:
##                       Value Std.Error   t-value p-value
## (Intercept)       17.370929  3.625665  4.791101  0.0087
## Depth              0.044158  0.005948  7.424508  0.0018
## Date2015-11       -4.634348  5.120841 -0.904997  0.4166
## Depth:Date2015-11  0.000733  0.008400  0.087228  0.9347
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.935              
## Date2015-11       -0.708  0.662       
## Depth:Date2015-11  0.662 -0.708 -0.934
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.40928507 -0.28776079 -0.05151208  0.43550690  1.12737312 
## 
## Residual standard error: 2.578986 
## Degrees of freedom: 8 total; 4 residual
# Normalized enivronmental data to mean zero and unit variance
es <- as.data.frame(scale(div[, c(-1:-23, -45)]))

# General linear Models
dat <- cbind(div[, c(1:23, 45:46)], es)
f <- gls(d0 ~ Speed+CN+Salinity+over20+TOC+transmissometer+Temperature, data=dat)
summary(f)
## Generalized least squares fit by REML
##   Model: d0 ~ Speed + CN + Salinity + over20 + TOC + transmissometer +      Temperature 
##   Data: dat 
##        AIC      BIC    logLik
##   88.18052 88.89549 -35.09026
## 
## Coefficients:
##                     Value Std.Error   t-value p-value
## (Intercept)     23.811083  1.699489 14.010729  0.0000
## Speed            0.256441  4.804520  0.053375  0.9587
## CN               0.409696  2.749375  0.149014  0.8852
## Salinity        -1.749131  2.034646 -0.859674  0.4150
## over20           5.977026  3.844906  1.554531  0.1587
## TOC             14.983141  4.662100  3.213818  0.0124
## transmissometer  9.429508  3.476319  2.712498  0.0266
## Temperature      0.838507  2.034976  0.412048  0.6911
## 
##  Correlation: 
##                 (Intr) Speed  CN     Salnty over20 TOC    trnsms
## Speed            0.000                                          
## CN               0.000 -0.451                                   
## Salinity         0.000 -0.396  0.026                            
## over20           0.000 -0.442  0.266  0.318                     
## TOC              0.000  0.533 -0.703  0.012  0.036              
## transmissometer  0.000  0.212  0.378 -0.159  0.288 -0.324       
## Temperature      0.000 -0.035 -0.118 -0.109 -0.298  0.103 -0.296
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.3449936 -0.5102668  0.1197368  0.6670029  0.9405259 
## 
## Residual standard error: 6.797957 
## Degrees of freedom: 16 total; 8 residual
dianostic_plot(f, y = "d0")

# Model selection
ms <- dredge(f)
kable(ms[1:10,])
(Intercept) CN over20 Salinity Speed Temperature TOC transmissometer df logLik AICc delta weight
99 23.81108 NA 6.931315 NA NA NA 15.83907 9.309679 5 -42.75189 101.5038 0.000000 0.30238629
107 23.81108 NA 7.168435 NA -0.8007028 NA 15.55607 9.050789 6 -40.59219 102.5177 1.013915 0.18213492
103 23.81108 NA 6.539584 -1.501806 NA NA 15.22665 9.361702 6 -40.92467 103.1827 1.678893 0.13061562
100 23.81108 0.1859984 6.947268 NA NA NA 15.67032 9.440106 6 -41.09199 103.5173 2.013522 0.11049213
115 23.81108 NA 6.602599 NA NA 0.6028105 15.93857 9.086122 6 -41.21752 103.7684 2.264584 0.09745721
111 23.81108 NA 6.271456 -1.663113 0.7633273 NA 15.43066 9.614095 7 -38.65646 105.3129 3.809122 0.04502168
108 23.81108 0.5729218 7.352925 NA -1.2577530 NA 14.87475 9.304760 7 -38.72738 105.4548 3.950978 0.04193901
123 23.81108 NA 6.856693 NA -1.0147939 0.6879508 15.59396 8.726435 7 -38.99235 105.9847 4.480911 0.03217694
105 23.81108 NA NA NA 1.8957498 NA 13.10666 7.586939 5 -45.05021 106.1004 4.596635 0.03036795
109 23.81108 NA NA -2.754858 3.9275903 NA 13.40662 8.823438 6 -42.48609 106.3055 4.801723 0.02740824
# Model averaging
ma <- model.avg(ms, fit=TRUE)
kable(summary(ma)$coefmat.full)
Estimate Std. Error Adjusted SE z value Pr(>|z|)
(Intercept) 23.8110833 1.602674 1.796915 13.2510889 0.0000000
over20 5.3403977 3.862206 4.097446 1.3033478 0.1924561
TOC 15.1312150 3.583217 3.919393 3.8606016 0.0001131
transmissometer 8.4372472 3.428524 3.683902 2.2903017 0.0220038
Speed -0.0672683 2.588999 2.845175 0.0236429 0.9811374
Salinity -0.4731815 1.235211 1.325075 0.3570980 0.7210184
CN -0.0121840 1.351921 1.485588 0.0082015 0.9934563
Temperature 0.2563924 1.078765 1.176144 0.2179941 0.8274337
# Best model
b <- get.models(ms, subset=1)[[1]]
summary(b)
## Generalized least squares fit by REML
##   Model: d0 ~ over20 + TOC + transmissometer + 1 
##   Data: dat 
##        AIC      BIC    logLik
##   95.50379 97.92832 -42.75189
## 
## Coefficients:
##                     Value Std.Error   t-value p-value
## (Intercept)     23.811083  1.466034 16.241836  0.0000
## over20           6.931315  2.755371  2.515566  0.0271
## TOC             15.839069  2.606623  6.076471  0.0001
## transmissometer  9.309678  2.376799  3.916898  0.0020
## 
##  Correlation: 
##                 (Intr) over20 TOC   
## over20           0.000              
## TOC              0.000  0.565       
## transmissometer  0.000  0.425 -0.291
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.04570269 -0.60567852 -0.03575414  0.79895824  1.23772486 
## 
## Residual standard error: 5.864136 
## Degrees of freedom: 16 total; 12 residual
dianostic_plot(b, y = "d0")

kable(summary(b)$tTable)
Value Std.Error t-value p-value
(Intercept) 23.811083 1.466034 16.241836 0.0000000
over20 6.931315 2.755371 2.515566 0.0271292
TOC 15.839070 2.606623 6.076471 0.0000553
transmissometer 9.309679 2.376799 3.916898 0.0020466
# Relative importance
kable(summary(ma)$sw)
x
TOC 0.9940583
transmissometer 0.9473144
over20 0.7958682
Speed 0.3466504
Salinity 0.2659241
CN 0.2360960
Temperature 0.2172008
ggplot()+
  geom_errorbar(data=div, aes(x=Zone, y=d0, ymin=d0, ymax=d0+d0.sd, fill=Date), position="dodge")+
  geom_bar(data=div, aes(x=Zone, y=d0, fill=Date), stat="identity", position="dodge", colour=gray(0, 0.2))+
  facet_wrap(~Habitat)+
  labs(x="Depth (m)", y="Species Richness")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large %+replace% theme(axis.text.x=element_text(angle=30))

ggplot(data=div, aes(x=Abund, y=d0, colour=Habitat, shape=Date))+
  geom_point(size=5)+
  labs(x="Number of Individual", y="Species Richness")+
  scale_shape_manual(values=c(1, 19))+
  scale_x_log10()+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large

q=1 or exponential Shannon diversity

ggplot(data=div, 
       aes(x=Depth, y=d1, ymin=d1-d1.sd, ymax=d1+d1.sd, colour=Habitat))+
  geom_pointrange(aes(fill=Habitat), pch=21, colour=gray(0, 0.2), fatten = 10, 
                  position=position_jitter(width=10))+
  stat_smooth(method="lm", fill="gray60")+
  labs(x="Depth (m)", y=expression(exp(Shannon)))+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large

# Linear regression
hl <- splitBy(~Habitat, div)
lapply(hl, FUN=function(x)summary(lm(d0~Depth, data=x)))
## $Canyon
## 
## Call:
## lm(formula = d0 ~ Depth, data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8264 -1.0433  0.3256  0.6468  2.2704 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 1.897619   1.280434   1.482  0.18885   
## Depth       0.008435   0.001862   4.529  0.00398 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.458 on 6 degrees of freedom
## Multiple R-squared:  0.7737, Adjusted R-squared:  0.736 
## F-statistic: 20.52 on 1 and 6 DF,  p-value: 0.003977
## 
## 
## $Slope
## 
## Call:
## lm(formula = d0 ~ Depth, data = x)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.700 -1.224  1.172  1.754  3.034 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 15.049996   3.197072   4.707 0.003301 ** 
## Depth        0.044530   0.005245   8.491 0.000146 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.22 on 6 degrees of freedom
## Multiple R-squared:  0.9232, Adjusted R-squared:  0.9104 
## F-statistic: 72.09 on 1 and 6 DF,  p-value: 0.000146
# Linear Mixed-Effects Models
f <- lme(fixed = d1 ~ Habitat*Depth, random = ~1|Cruise, data=div, method = "REML", weights=varIdent(form=~1|Cruise))
summary(f)
## Linear mixed-effects model fit by REML
##  Data: div 
##        AIC      BIC    logLik
##   98.85746 102.2518 -42.42873
## 
## Random effects:
##  Formula: ~1 | Cruise
##         (Intercept)  Residual
## StdDev:    1.285402 0.8335872
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Cruise 
##  Parameter estimates:
## OR1_1114 OR1_1126 
## 1.000000 5.295347 
## Fixed effects: d1 ~ Habitat * Depth 
##                        Value Std.Error DF   t-value p-value
## (Intercept)         1.785909 1.4785387 11  1.207888  0.2524
## HabitatSlope       11.507348 1.5403980 11  7.470373  0.0000
## Depth               0.004960 0.0014985 11  3.310061  0.0070
## HabitatSlope:Depth  0.027269 0.0024110 11 11.310264  0.0000
##  Correlation: 
##                    (Intr) HbttSl Depth 
## HabitatSlope       -0.460              
## Depth              -0.635  0.609       
## HabitatSlope:Depth  0.395 -0.926 -0.622
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.00184011 -0.63342814  0.07772485  0.43123366  1.19754403 
## 
## Number of Observations: 16
## Number of Groups: 2
dianostic_plot(f, y = "d1")

# Adding time into linear model
f <- gls(d1 ~ Habitat+Depth+Date+Habitat:Depth+Habitat:Date++Depth:Date, data=div, method = "REML")
summary(f)
## Generalized least squares fit by REML
##   Model: d1 ~ Habitat + Depth + Date + Habitat:Depth + Habitat:Date +      +Depth:Date 
##   Data: div 
##        AIC      BIC    logLik
##   95.67533 97.25313 -39.83767
## 
## Coefficients:
##                              Value Std.Error   t-value p-value
## (Intercept)               0.704178 1.9917725  0.353543  0.7318
## HabitatSlope             15.677256 2.4682878  6.351470  0.0001
## Depth                     0.007647 0.0028631  2.670671  0.0256
## Date2015-11               3.442971 2.5269571  1.362497  0.2062
## HabitatSlope:Depth        0.020641 0.0036087  5.719769  0.0003
## HabitatSlope:Date2015-11 -7.289412 1.7559474 -4.151270  0.0025
## Depth:Date2015-11        -0.003697 0.0035053 -1.054580  0.3191
## 
##  Correlation: 
##                          (Intr) HbttSl Depth  D2015- HbtS:D HS:D20
## HabitatSlope             -0.578                                   
## Depth                    -0.899  0.471                            
## Date2015-11              -0.639  0.162  0.543                     
## HabitatSlope:Depth        0.438 -0.865 -0.487 -0.006              
## HabitatSlope:Date2015-11  0.278 -0.351 -0.067 -0.446 -0.004       
## Depth:Date2015-11         0.565 -0.050 -0.629 -0.873  0.012  0.119
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.3107144 -0.6846311  0.1872838  0.6379184  1.0268931 
## 
## Residual standard error: 1.74337 
## Degrees of freedom: 16 total; 9 residual
dianostic_plot(f, y = "d1")

# Pairwise tests on time in canyon
fp <- gls(d1 ~ Depth+Date+Depth:Date, data=subset(div, Habitat=="Canyon"), method = "REML")
summary(fp)
## Generalized least squares fit by REML
##   Model: d1 ~ Depth + Date + Depth:Date 
##   Data: subset(div, Habitat == "Canyon") 
##        AIC      BIC    logLik
##   53.07896 50.01044 -21.53948
## 
## Coefficients:
##                       Value Std.Error   t-value p-value
## (Intercept)       2.4278546 1.9822100 1.2248221  0.2878
## Depth             0.0048909 0.0029043 1.6839867  0.1675
## Date2015-11       0.0651763 2.7864624 0.0233903  0.9825
## Depth:Date2015-11 0.0016711 0.0040535 0.4122730  0.7013
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.916              
## Date2015-11       -0.711  0.652       
## Depth:Date2015-11  0.657 -0.717 -0.915
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.31108114 -0.34801658  0.07781107  0.23840394  1.36959172 
## 
## Residual standard error: 1.586075 
## Degrees of freedom: 8 total; 4 residual
# Pairwise tests on time on slope
fp <- gls(d1 ~ Depth+Date+Depth:Date, data=subset(div, Habitat=="Slope"), method = "REML")
summary(fp)
## Generalized least squares fit by REML
##   Model: d1 ~ Depth + Date + Depth:Date 
##   Data: subset(div, Habitat == "Slope") 
##        AIC      BIC    logLik
##   49.84438 46.77586 -19.92219
## 
## Coefficients:
##                       Value Std.Error   t-value p-value
## (Intercept)       13.891177 1.6802767  8.267196  0.0012
## Depth              0.032658 0.0027563 11.848511  0.0003
## Date2015-11        1.120439 2.3732003  0.472122  0.6614
## Depth:Date2015-11 -0.012416 0.0038931 -3.189271  0.0332
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.935              
## Date2015-11       -0.708  0.662       
## Depth:Date2015-11  0.662 -0.708 -0.934
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -0.9211678 -0.6956915  0.1305477  0.5529969  1.0093048 
## 
## Residual standard error: 1.195204 
## Degrees of freedom: 8 total; 4 residual
# Normalized enivronmental data to mean zero and unit variance
es <- as.data.frame(scale(div[, c(-1:-23, -45)]))

# General linear Models
dat <- cbind(div[, c(1:23, 45:46)], es)
f <- gls(d1 ~ Speed+CN+Salinity+over20+TOC+transmissometer+Temperature, data=dat)
summary(f)
## Generalized least squares fit by REML
##   Model: d1 ~ Speed + CN + Salinity + over20 + TOC + transmissometer +      Temperature 
##   Data: dat 
##        AIC      BIC    logLik
##   83.59287 84.30785 -32.79644
## 
## Coefficients:
##                     Value Std.Error   t-value p-value
## (Intercept)     17.793312  1.275834 13.946416  0.0000
## Speed           -2.521972  3.606831 -0.699221  0.5042
## CN               1.804134  2.064000  0.874096  0.4075
## Salinity        -1.513705  1.527441 -0.991007  0.3507
## over20           4.958517  2.886433  1.717870  0.1241
## TOC              7.006896  3.499914  2.002020  0.0803
## transmissometer  7.665678  2.609729  2.937346  0.0188
## Temperature      0.784527  1.527689  0.513538  0.6215
## 
##  Correlation: 
##                 (Intr) Speed  CN     Salnty over20 TOC    trnsms
## Speed            0.000                                          
## CN               0.000 -0.451                                   
## Salinity         0.000 -0.396  0.026                            
## over20           0.000 -0.442  0.266  0.318                     
## TOC              0.000  0.533 -0.703  0.012  0.036              
## transmissometer  0.000  0.212  0.378 -0.159  0.288 -0.324       
## Temperature      0.000 -0.035 -0.118 -0.109 -0.298  0.103 -0.296
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.19558539 -0.56489342 -0.03804346  0.59914743  1.34412801 
## 
## Residual standard error: 5.103336 
## Degrees of freedom: 16 total; 8 residual
dianostic_plot(f, y = "d1")

# Model selection
ms <- dredge(f)
kable(ms[1:10,])
(Intercept) CN over20 Salinity Speed Temperature TOC transmissometer df logLik AICc delta weight
99 17.79331 NA 4.879562 NA NA NA 10.022825 7.454505 5 -40.38558 96.77115 0.0000000 0.27246451
107 17.79331 NA 5.549459 NA -2.2620965 NA 9.223318 6.723105 6 -38.11988 97.57310 0.8019455 0.18246084
103 17.79331 NA 4.452396 -1.637650 NA NA 9.355013 7.511234 6 -38.39295 98.11924 1.3480882 0.13885973
100 17.79331 0.7790588 4.946381 NA NA NA 9.316025 8.000803 6 -38.82715 98.98763 2.2164806 0.08995127
115 17.79331 NA 4.570064 NA NA 0.5675674 10.116509 7.244018 6 -39.03024 99.39382 2.6226629 0.07341857
101 17.79331 NA NA -2.002707 NA NA 6.878799 5.926211 5 -41.95698 99.91396 3.1428049 0.05660549
108 17.79331 1.9546452 6.178887 NA -3.8214211 NA 6.898847 7.589583 7 -36.03073 100.06146 3.2903028 0.05258111
97 17.79331 NA NA NA NA NA 7.416448 5.665290 4 -44.23032 100.09701 3.3258596 0.05165457
105 17.79331 NA NA NA -0.1746322 NA 7.327103 5.589863 5 -42.23020 100.46041 3.6892529 0.04307238
111 17.79331 NA 4.768912 -1.447233 -0.9010852 NA 9.114187 7.213292 7 -36.33128 100.66256 3.8914073 0.03893154
# Model averaging
ma <- model.avg(ms, fit=TRUE)
kable(summary(ma)$coefmat.full)
Estimate Std. Error Adjusted SE z value Pr(>|z|)
(Intercept) 17.7933125 1.3103804 1.4673931 12.1257983 0.0000000
over20 3.3515538 3.1051554 3.2746176 1.0234947 0.3060740
TOC 8.3350624 3.4647112 3.6755242 2.2677207 0.0233462
transmissometer 6.6081361 2.9169961 3.1085113 2.1258202 0.0335182
Speed -0.9541017 2.6378440 2.8096074 0.3395854 0.7341688
Salinity -0.5647541 1.1824400 1.2502060 0.4517288 0.6514643
CN 0.3858806 1.4250774 1.5115168 0.2552936 0.7984963
Temperature 0.2390884 0.8951809 0.9680737 0.2469734 0.8049288
# Best model
b <- get.models(ms, subset=1)[[1]]
summary(b)
## Generalized least squares fit by REML
##   Model: d1 ~ over20 + TOC + transmissometer + 1 
##   Data: dat 
##        AIC      BIC    logLik
##   90.77115 93.19569 -40.38558
## 
## Coefficients:
##                     Value Std.Error   t-value p-value
## (Intercept)     17.793313  1.203661 14.782664  0.0000
## over20           4.879561  2.262247  2.156953  0.0520
## TOC             10.022826  2.140121  4.683299  0.0005
## transmissometer  7.454505  1.951428  3.820026  0.0024
## 
##  Correlation: 
##                 (Intr) over20 TOC   
## over20           0.000              
## TOC              0.000  0.565       
## transmissometer  0.000  0.425 -0.291
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.6738077 -0.6453245 -0.1467390  0.8397799  1.3394624 
## 
## Residual standard error: 4.814643 
## Degrees of freedom: 16 total; 12 residual
dianostic_plot(b, y = "d1")

kable(summary(b)$tTable)
Value Std.Error t-value p-value
(Intercept) 17.793312 1.203661 14.782664 0.0000000
over20 4.879562 2.262247 2.156953 0.0519936
TOC 10.022825 2.140121 4.683299 0.0005293
transmissometer 7.454505 1.951428 3.820026 0.0024394
# Relative importance
kable(summary(ma)$sw)
x
TOC 0.9384656
transmissometer 0.9342902
over20 0.6929404
Speed 0.3792084
Salinity 0.3047365
CN 0.2645777
Temperature 0.2068886
ggplot()+
  geom_errorbar(data=div, aes(x=Zone, y=d1, ymin=d1, ymax=d1+d1.sd, fill=Date), position="dodge")+
  geom_bar(data=div, aes(x=Zone, y=d1, fill=Date), stat="identity", position="dodge", colour=gray(0, 0.2))+
  facet_wrap(~Habitat)+
  labs(x="Depth (m)", y=expression(exp(Shannon)))+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large %+replace% theme(axis.text.x=element_text(angle=30))

ggplot(data=div, aes(x=Abund, y=d1, colour=Habitat, shape=Date))+
  geom_point(size=5)+
  labs(x="Number of Individual", y=expression(exp(Shannon)))+
  scale_shape_manual(values=c(1, 19))+
  scale_x_log10()+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large

q=2 or inverse Simpson diversity

ggplot(data=div, 
       aes(x=Depth, y=d2, ymin=d2-d2.sd, ymax=d2+d2.sd, colour=Habitat))+
  geom_pointrange(aes(fill=Habitat), pch=21, colour=gray(0, 0.2), fatten = 10, 
                  position=position_jitter(width=10))+
  stat_smooth(method="lm", fill="gray60")+
  labs(x="Depth (m)", y=expression(1/Simpson))+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large

# Linear regression
hl <- splitBy(~Habitat, div)
lapply(hl, FUN=function(x)summary(lm(d0~Depth, data=x)))
## $Canyon
## 
## Call:
## lm(formula = d0 ~ Depth, data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8264 -1.0433  0.3256  0.6468  2.2704 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 1.897619   1.280434   1.482  0.18885   
## Depth       0.008435   0.001862   4.529  0.00398 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.458 on 6 degrees of freedom
## Multiple R-squared:  0.7737, Adjusted R-squared:  0.736 
## F-statistic: 20.52 on 1 and 6 DF,  p-value: 0.003977
## 
## 
## $Slope
## 
## Call:
## lm(formula = d0 ~ Depth, data = x)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.700 -1.224  1.172  1.754  3.034 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 15.049996   3.197072   4.707 0.003301 ** 
## Depth        0.044530   0.005245   8.491 0.000146 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.22 on 6 degrees of freedom
## Multiple R-squared:  0.9232, Adjusted R-squared:  0.9104 
## F-statistic: 72.09 on 1 and 6 DF,  p-value: 0.000146
# Linear Mixed-Effects Models
f <- lme(fixed = d2 ~ Habitat*Depth, random = ~1|Cruise, data=div, method = "REML", weights=varIdent(form=~1|Cruise), na.action = na.omit)
summary(f)
## Linear mixed-effects model fit by REML
##  Data: div 
##        AIC      BIC    logLik
##   93.05569 95.84096 -39.52785
## 
## Random effects:
##  Formula: ~1 | Cruise
##         (Intercept)  Residual
## StdDev:      1.8875 0.9230487
## 
## Variance function:
##  Structure: Different standard deviations per stratum
##  Formula: ~1 | Cruise 
##  Parameter estimates:
## OR1_1114 OR1_1126 
## 1.000000 4.324451 
## Fixed effects: d2 ~ Habitat * Depth 
##                        Value Std.Error DF  t-value p-value
## (Intercept)         1.005991 1.8614520 10 0.540433  0.6007
## HabitatSlope       10.030605 1.6951037 10 5.917399  0.0001
## Depth               0.004045 0.0016476 10 2.455143  0.0340
## HabitatSlope:Depth  0.017976 0.0026486 10 6.786951  0.0000
##  Correlation: 
##                    (Intr) HbttSl Depth 
## HabitatSlope       -0.412              
## Depth              -0.563  0.610       
## HabitatSlope:Depth  0.350 -0.925 -0.622
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.22863465 -0.27923353 -0.01072227  0.46556809  1.23245131 
## 
## Number of Observations: 15
## Number of Groups: 2
dianostic_plot(f, y = "d2")

# Adding time into linear model
f <- gls(d2 ~ Habitat+Depth+Date+Habitat:Depth+Habitat:Date++Depth:Date, data=div, method = "REML", na.action = na.omit)
summary(f)
## Generalized least squares fit by REML
##   Model: d2 ~ Habitat + Depth + Date + Habitat:Depth + Habitat:Date +      +Depth:Date 
##   Data: div 
##        AIC      BIC    logLik
##   93.83381 94.46935 -38.91691
## 
## Coefficients:
##                              Value Std.Error   t-value p-value
## (Intercept)               0.397666 2.3287455  0.170764  0.8686
## HabitatSlope             14.078104 2.9006756  4.853388  0.0013
## Depth                     0.006726 0.0033490  2.008375  0.0795
## Date2015-11               3.009544 3.1328611  0.960638  0.3649
## HabitatSlope:Depth        0.011559 0.0042507  2.719383  0.0263
## HabitatSlope:Date2015-11 -5.695068 2.1520283 -2.646372  0.0294
## Depth:Date2015-11        -0.004729 0.0041509 -1.139153  0.2876
## 
##  Correlation: 
##                          (Intr) HbttSl Depth  D2015- HbtS:D HS:D20
## HabitatSlope             -0.580                                   
## Depth                    -0.900  0.474                            
## Date2015-11              -0.581  0.112  0.488                     
## HabitatSlope:Depth        0.441 -0.867 -0.491  0.040              
## HabitatSlope:Date2015-11  0.246 -0.297 -0.044 -0.503 -0.045       
## Depth:Date2015-11         0.546 -0.030 -0.607 -0.867 -0.011  0.163
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.78271417 -0.34490935 -0.05709251  0.65523017  1.14464988 
## 
## Residual standard error: 2.034691 
## Degrees of freedom: 15 total; 8 residual
dianostic_plot(f, y = "d2")

# Pairwise tests on time in canyon
fp <- gls(d2 ~ Depth+Date+Depth:Date, data=subset(div, Habitat=="Canyon"), method = "REML", na.action = na.omit)
summary(fp)
## Generalized least squares fit by REML
##   Model: d2 ~ Depth + Date + Depth:Date 
##   Data: subset(div, Habitat == "Canyon") 
##        AIC      BIC    logLik
##   42.74967 38.24273 -16.37484
## 
## Coefficients:
##                        Value Std.Error   t-value p-value
## (Intercept)        2.1432307 0.7069329  3.031731  0.0562
## Depth              0.0039353 0.0010358  3.799281  0.0320
## Date2015-11       -0.7199292 1.0614052 -0.678279  0.5462
## Depth:Date2015-11  0.0009711 0.0014803  0.656015  0.5586
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.916              
## Date2015-11       -0.666  0.610       
## Depth:Date2015-11  0.641 -0.700 -0.913
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.0541847 -0.4668356  0.2182491  0.5270924  0.7154221 
## 
## Residual standard error: 0.5656559 
## Degrees of freedom: 7 total; 3 residual
# Pairwise tests on time on slope
fp <- gls(d2 ~ Depth+Date+Depth:Date, data=subset(div, Habitat=="Slope"), method = "REML", na.action = na.omit)
summary(fp)
## Generalized least squares fit by REML
##   Model: d2 ~ Depth + Date + Depth:Date 
##   Data: subset(div, Habitat == "Slope") 
##        AIC      BIC    logLik
##   54.86898 51.80046 -22.43449
## 
## Coefficients:
##                       Value Std.Error   t-value p-value
## (Intercept)       11.953891  3.148838  3.796286  0.0192
## Depth              0.022711  0.005165  4.396892  0.0117
## Date2015-11        2.344428  4.447377  0.527148  0.6260
## Depth:Date2015-11 -0.013559  0.007296 -1.858469  0.1366
## 
##  Correlation: 
##                   (Intr) Depth  D2015-
## Depth             -0.935              
## Date2015-11       -0.708  0.662       
## Depth:Date2015-11  0.662 -0.708 -0.934
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -1.07185858 -0.59480830  0.08580928  0.41487048  1.27675417 
## 
## Residual standard error: 2.239812 
## Degrees of freedom: 8 total; 4 residual
# Normalized enivronmental data to mean zero and unit variance
es <- as.data.frame(scale(div[, c(-1:-23, -45)]))

# General linear Models
dat <- cbind(div[, c(1:23, 45:46)], es)
dat <- subset(dat, !is.na(d2))

f <- gls(d2 ~ Speed+CN+Salinity+over20+TOC+transmissometer+Temperature, data=dat)
summary(f)
## Generalized least squares fit by REML
##   Model: d2 ~ Speed + CN + Salinity + over20 + TOC + transmissometer +      Temperature 
##   Data: dat 
##        AIC      BIC    logLik
##   72.73228 72.24547 -27.36614
## 
## Coefficients:
##                     Value Std.Error   t-value p-value
## (Intercept)     13.036296  1.171612 11.126801  0.0000
## Speed           -2.977353  2.897295 -1.027632  0.3383
## CN               1.175638  1.877470  0.626182  0.5511
## Salinity        -0.768100  1.312970 -0.585009  0.5769
## over20           2.145716  2.965550  0.723547  0.4928
## TOC              5.976091  3.923424  1.523183  0.1715
## transmissometer  3.498991  3.369328  1.038483  0.3336
## Temperature      2.006542  1.710734  1.172913  0.2792
## 
##  Correlation: 
##                 (Intr) Speed  CN     Salnty over20 TOC    trnsms
## Speed           -0.047                                          
## CN               0.235 -0.441                                   
## Salinity        -0.181 -0.331 -0.154                            
## over20           0.309 -0.403  0.481 -0.001                     
## TOC             -0.345  0.446 -0.775  0.265 -0.420              
## transmissometer  0.386  0.055  0.580 -0.380  0.632 -0.693       
## Temperature     -0.344  0.043 -0.408  0.185 -0.605  0.543 -0.680
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -0.97278158 -0.57274201 -0.07474042  0.28981548  1.52831874 
## 
## Residual standard error: 4.080262 
## Degrees of freedom: 15 total; 7 residual
dianostic_plot(f, y = "d2")

# Model selection
ms <- dredge(f)
kable(ms[1:10,])
(Intercept) CN over20 Salinity Speed Temperature TOC transmissometer df logLik AICc delta weight
57 12.53786 NA NA NA -3.083436 3.246244 8.826432 NA 5 -33.98130 84.62927 0.000000 0.27509021
113 12.70040 NA NA NA NA 2.557248 9.139065 2.307743 5 -34.66336 85.99339 1.364117 0.13907878
49 12.44263 NA NA NA NA 3.091712 11.498001 NA 4 -37.06933 86.13865 1.509380 0.12933541
121 12.63367 NA NA NA -2.475018 2.978137 8.304831 1.025991 6 -32.29422 87.08843 2.459163 0.08044048
51 12.39226 NA -1.3093468 NA NA 3.350219 10.649754 NA 5 -35.31075 87.28817 2.658899 0.07279515
59 12.55664 NA 0.3409891 NA -3.266563 3.188100 8.888672 NA 6 -32.41345 87.32690 2.697630 0.07139902
99 13.23076 NA 2.8543120 NA NA NA 7.589568 4.910599 5 -35.37138 87.40942 2.780150 0.06851304
50 12.47611 -1.1579460 NA NA NA 3.111661 11.987125 NA 5 -35.50365 87.67396 3.044688 0.06002463
97 13.04371 NA NA NA NA NA 6.737601 3.469259 4 -37.94471 87.88943 3.260157 0.05389409
58 12.53785 0.0314748 NA NA -3.112640 3.247166 8.787833 NA 6 -32.78119 88.06239 3.433115 0.04942920
# Model averaging
ma <- model.avg(ms, fit=TRUE)
kable(summary(ma)$coefmat.full)
Estimate Std. Error Adjusted SE z value Pr(>|z|)
(Intercept) 12.7513474 1.1086330 1.2373638 10.3052529 0.0000000
Speed -1.4015404 2.1755509 2.2980184 0.6098909 0.5419341
Temperature 2.1040819 1.7089824 1.7823283 1.1805243 0.2377917
TOC 8.4626290 3.2238698 3.3931072 2.4940647 0.0126290
transmissometer 1.5411507 2.4539745 2.5533411 0.6035820 0.5461216
over20 0.3639211 1.6404629 1.7557809 0.2072702 0.8357988
CN 0.0565012 0.9125623 0.9740824 0.0580045 0.9537450
Salinity -0.1599696 0.6500895 0.7018150 0.2279370 0.8196952
# Best model
b <- get.models(ms, subset=1)[[1]]
summary(b)
## Generalized least squares fit by REML
##   Model: d2 ~ Speed + Temperature + TOC + 1 
##   Data: dat 
##       AIC      BIC   logLik
##   77.9626 79.95208 -33.9813
## 
## Coefficients:
##                 Value Std.Error   t-value p-value
## (Intercept) 12.537862 0.9237224 13.573193  0.0000
## Speed       -3.083436 1.6194890 -1.903956  0.0834
## Temperature  3.246244 1.0254869  3.165564  0.0090
## TOC          8.826432 1.8430503  4.789035  0.0006
## 
##  Correlation: 
##             (Intr) Speed  Tmprtr
## Speed       -0.054              
## Temperature -0.099 -0.079       
## TOC         -0.159  0.761  0.233
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -1.3644549 -0.5659801 -0.1446742  0.4021507  1.9872177 
## 
## Residual standard error: 3.51181 
## Degrees of freedom: 15 total; 11 residual
dianostic_plot(b, y = "d2")

kable(summary(b)$tTable)
Value Std.Error t-value p-value
(Intercept) 12.537862 0.9237224 13.573193 0.0000000
Speed -3.083436 1.6194890 -1.903956 0.0833901
Temperature 3.246244 1.0254869 3.165564 0.0089894
TOC 8.826432 1.8430503 4.789035 0.0005632
# Relative importance
kable(summary(ma)$sw)
x
TOC 0.9498616
Temperature 0.7023827
Speed 0.4654285
transmissometer 0.4485296
over20 0.2951074
CN 0.2046720
Salinity 0.1732685
ggplot()+
  geom_errorbar(data=div, aes(x=Zone, y=d2, ymin=d2, ymax=d2+d2.sd, fill=Date), position="dodge")+
  geom_bar(data=div, aes(x=Zone, y=d2, fill=Date), stat="identity", position="dodge", colour=gray(0, 0.2))+
  facet_wrap(~Habitat)+
  labs(x="Depth (m)", y=expression(1/Simpson))+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large %+replace% theme(axis.text.x=element_text(angle=30))

ggplot(data=div, aes(x=Abund, y=d2, colour=Habitat, shape=Date))+
  geom_point(size=5)+
  labs(x="Number of Individual", y=expression(1/Simpson))+
  scale_shape_manual(values=c(1, 19))+
  scale_x_log10()+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large

Correlation bettwen exponential Shannon (q=1) and environmental factor

p1 <- ggplot(data=div, 
       aes(x=Speed, y=d1, ymin=d1-d1.sd, ymax=d1+d1.sd))+
  geom_pointrange(aes(fill=Habitat), pch=21, colour=gray(0, 0.2), fatten=10)+
  stat_smooth(method="lm", fill="gray60", colour="gray50")+
  labs(x="Modeled Current Velocity (m/s)", y="Shannon Diversity")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large %+replace% theme(legend.position="none")

cor.test(formula=~d1+Speed, data=div)
## 
##  Pearson's product-moment correlation
## 
## data:  d1 and Speed
## t = -5.1374, df = 14, p-value = 0.0001509
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9309941 -0.5216518
## sample estimates:
##        cor 
## -0.8083337
p2 <- ggplot(data=div, 
       aes(x=over20, y=d1, ymin=d1.sd, ymax=d1.sd))+
  geom_pointrange(aes(fill=Habitat), pch=21, colour=gray(0, 0.2), fatten=10)+
  stat_smooth(method="lm", fill="gray60", colour="gray50")+
  labs(x="Modeled Duration of Erosion (hr)", y="Shannon Diversity")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large %+replace% theme(legend.position="none")

cor.test(formula=~d1+over20, data=div)
## 
##  Pearson's product-moment correlation
## 
## data:  d1 and over20
## t = -3.2722, df = 14, p-value = 0.005561
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8700823 -0.2413675
## sample estimates:
##        cor 
## -0.6583081
p3 <- ggplot(data=div, 
       aes(x=transmissometer, y=d1, ymin=d1.sd, ymax=d1.sd))+
  geom_pointrange(aes(fill=Habitat), pch=21, colour=gray(0, 0.2), fatten=10)+
  stat_smooth(method="lm", fill="gray60", colour="gray50")+
  labs(x="Light Transmission (%)", y="")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large %+replace% theme(legend.position="none")

cor.test(formula=~d1+transmissometer, data=div)
## 
##  Pearson's product-moment correlation
## 
## data:  d1 and transmissometer
## t = 5.6207, df = 14, p-value = 6.31e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5732749 0.9401779
## sample estimates:
##       cor 
## 0.8324254
p4 <- ggplot(data=div, 
       aes(x=TOC, y=d1, ymin=d1.sd, ymax=d1.sd))+
  geom_pointrange(aes(fill=Habitat), pch=21, colour=gray(0, 0.2), fatten=10)+
  stat_smooth(method="lm", fill="gray60", colour="gray50")+
  labs(x="Total Organic Carbon (%)", y="")+
  scale_fill_viridis(discrete = TRUE)+
  scale_color_viridis(discrete = TRUE)+
  theme_bw() %+replace% large %+replace% theme(legend.position="none")

cor.test(formula=~d1+TOC, data=div)
## 
##  Pearson's product-moment correlation
## 
## data:  d1 and TOC
## t = 6.6347, df = 14, p-value = 1.124e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6605280 0.9545758
## sample estimates:
##       cor 
## 0.8710333
plot_grid(p1, p4, ncol=2, align = "h", axis="b")

plot_grid(p2, p3, ncol=2, align = "h", axis="b")

names(div)[5:7] <- c("0D", "1D", "2D")
names(div)[9:12] <- c("0D.sd", "1D.sd", "2D.sd", "n")
kable(div[,1:12])
Cruise Station Deployment S.obs 0D 1D 2D S.obs.sd 0D.sd 1D.sd 2D.sd n
OR1_1114 GC1 2 10.333333 5.044333 4.340000 3.799333 1.154700 0.5664453 0.4900786 0.4334932 3
OR1_1114 GC2 1 8.666667 5.051667 4.162000 3.530000 2.886751 1.6741736 1.2831177 1.0488641 3
OR1_1114 GC3 1 15.333333 7.609667 5.844333 4.687000 1.527525 0.1904215 0.2993164 0.4304289 3
OR1_1114 GC4 1 15.000000 9.224000 7.602000 6.402667 1.732051 1.3414723 1.1495617 1.0471339 3
OR1_1114 GS1 1 34.333333 29.899333 23.530667 18.911333 3.511885 4.7629195 3.4796423 3.5620248 3
OR1_1114 GS2 1 35.666667 37.067333 27.878333 21.163333 15.373137 18.0976182 13.7723390 10.5275511 3
OR1_1114 GS3 1 44.000000 48.809667 37.214333 28.523667 2.645751 1.2724093 0.5531784 0.4451812 3
OR1_1114 GS4 1 48.000000 54.342333 41.369667 30.976667 5.196152 11.5947815 8.9482463 5.4122922 3
OR1_1126 GC1 1 8.333333 5.269000 4.045000 3.313667 2.081666 1.8636523 1.1184065 0.7470939 3
OR1_1126 GC2 1 4.000000 6.638333 7.861000 NA 1.000000 4.0120309 5.4285158 NA 3
OR1_1126 GC3 1 6.333333 5.596333 4.711667 4.040667 2.309401 1.6827597 1.4319917 1.2075766 3
OR1_1126 GC4 3 9.000000 13.219000 9.976000 6.954000 NA NA NA NA 1
OR1_1126 GS1 1 32.000000 26.753667 19.868000 15.354333 6.244998 9.1817361 7.4757539 6.2717982 3
OR1_1126 GS2 1 36.666667 32.665333 25.002000 19.556333 5.859465 8.2876435 7.1758573 6.9579645 3
OR1_1126 GS3 2 39.666667 40.076333 30.185000 23.473333 6.658328 14.3105101 9.9867666 7.4935269 3
OR1_1126 GS4 1 40.000000 53.711000 31.103000 19.659000 3.000000 4.5137059 4.5842007 5.4827099 3